Source Code of com.python.pydev.refactoring.refactorer.search.copied.PatternConstructor

/**
 * Copyright (c) 2005-2011 by Appcelerator, Inc. All Rights Reserved.
 * Licensed under the terms of the Eclipse Public License (EPL).
 * Please see the license.txt included with this distribution for details.
 * Any modifications to this file must keep this entire header intact.
 */
package com.python.pydev.refactoring.refactorer.search.copied;


import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;


import org.eclipse.jface.text.FindReplaceDocumentAdapter;


import com.python.pydev.ui.search.SearchMessages;


/**
 *
 */
public class PatternConstructor {


    private PatternConstructor() {
        // don't instantiate
    }


    public static Pattern createPattern(String pattern, boolean isCaseSensitive, boolean isRegex)
            throws PatternSyntaxException {
        return createPattern(pattern, isRegex, true, isCaseSensitive, false);
    }


    /**
     * Creates a pattern element from the pattern string which is either a reg-ex expression or in our old
     * 'StringMatcher' format.
     * @param pattern The search pattern
     * @param isRegex <code>true</code> if the passed string already is a reg-ex pattern
     * @param isStringMatcher <code>true</code> if the passed string is in the StringMatcher format.
     * @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern
     * @param isWholeWord <code>true</code> to create a pattern that requires a word boundary at the beginning and the end.
     * @return The created pattern
     * @throws PatternSyntaxException
     */
    public static Pattern createPattern(String pattern, boolean isRegex, boolean isStringMatcher,
            boolean isCaseSensitive, boolean isWholeWord) throws PatternSyntaxException {
        if (isRegex) {
            pattern = substituteLinebreak(pattern);
            if (isWholeWord) {
                StringBuffer buffer = new StringBuffer(pattern.length() + 10);
                buffer.append("\\b(?:").append(pattern).append(")\\b"); //$NON-NLS-1$ //$NON-NLS-2$
                pattern = buffer.toString();
            }
        } else {
            int len = pattern.length();
            StringBuffer buffer = new StringBuffer(len + 10);
            // don't add a word boundary if the search text does not start with
            // a word char. (this works around a user input error).
            if (isWholeWord && len > 0 && isWordChar(pattern.charAt(0))) {
                buffer.append("\\b"); //$NON-NLS-1$
            }
            appendAsRegEx(isStringMatcher, pattern, buffer);
            if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) {
                buffer.append("\\b"); //$NON-NLS-1$
            }
            pattern = buffer.toString();
        }


        int regexOptions = Pattern.MULTILINE;
        if (!isCaseSensitive) {
            regexOptions |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
        }
        return Pattern.compile(pattern, regexOptions);
    }


    /**
     * Copied from {@link org.eclipse.jface.text.FindReplaceDocumentAdapter}' to support '\R' 
     * @param findString the string to substitute
     * @return the new string
     * @throws PatternSyntaxException
     */
    private static String substituteLinebreak(String findString) throws PatternSyntaxException {
        int length = findString.length();
        StringBuffer buf = new StringBuffer(length);


        int inCharGroup = 0;
        int inBraces = 0;
        boolean inQuote = false;
        for (int i = 0; i < length; i++) {
            char ch = findString.charAt(i);
            switch (ch) {
                case '[':
                    buf.append(ch);
                    if (!inQuote)
                        inCharGroup++;
                    break;


                case ']':
                    buf.append(ch);
                    if (!inQuote)
                        inCharGroup--;
                    break;


                case '{':
                    buf.append(ch);
                    if (!inQuote && inCharGroup == 0)
                        inBraces++;
                    break;


                case '}':
                    buf.append(ch);
                    if (!inQuote && inCharGroup == 0)
                        inBraces--;
                    break;


                case '\\':
                    if (i + 1 < length) {
                        char ch1 = findString.charAt(i + 1);
                        if (inQuote) {
                            if (ch1 == 'E')
                                inQuote = false;
                            buf.append(ch).append(ch1);
                            i++;


                        } else if (ch1 == 'R') {
                            if (inCharGroup > 0 || inBraces > 0) {
                                String msg = SearchMessages.PatternConstructor_error_line_delim_position;
                                throw new PatternSyntaxException(msg, findString, i);
                            }
                            buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$
                            i++;


                        } else {
                            if (ch1 == 'Q') {
                                inQuote = true;
                            }
                            buf.append(ch).append(ch1);
                            i++;
                        }
                    } else {
                        buf.append(ch);
                    }
                    break;


                default:
                    buf.append(ch);
                    break;
            }


        }
        return buf.toString();
    }


    private static boolean isWordChar(char c) {
        return Character.isLetterOrDigit(c);
    }


    /**
     * Creates a pattern element from an array of patterns in the old
     * 'StringMatcher' format.
     * @param patterns The search patterns
     * @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern
     * @return The created pattern
     * @throws PatternSyntaxException
     */
    public static Pattern createPattern(String[] patterns, boolean isCaseSensitive) throws PatternSyntaxException {
        StringBuffer pattern = new StringBuffer();
        for (int i = 0; i < patterns.length; i++) {
            if (i > 0) {
                // note that this works only as we know that the operands of the
                // or expression will be simple and need no brackets.
                pattern.append('|');
            }
            appendAsRegEx(true, patterns[i], pattern);
        }
        return createPattern(pattern.toString(), true, true, isCaseSensitive, false);
    }


    public static StringBuffer appendAsRegEx(boolean isStringMatcher, String pattern, StringBuffer buffer) {
        boolean isEscaped = false;
        for (int i = 0; i < pattern.length(); i++) {
            char c = pattern.charAt(i);
            switch (c) {
            // the backslash
                case '\\':
                    // the backslash is escape char in string matcher
                    if (isStringMatcher && !isEscaped) {
                        isEscaped = true;
                    } else {
                        buffer.append("\\\\"); //$NON-NLS-1$
                        isEscaped = false;
                    }
                    break;
                // characters that need to be escaped in the regex.
                case '(':
                case ')':
                case '{':
                case '}':
                case '.':
                case '[':
                case ']':
                case '$':
                case '^':
                case '+':
                case '|':
                    if (isEscaped) {
                        buffer.append("\\\\"); //$NON-NLS-1$
                        isEscaped = false;
                    }
                    buffer.append('\\');
                    buffer.append(c);
                    break;
                case '?':
                    if (isStringMatcher && !isEscaped) {
                        buffer.append('.');
                    } else {
                        buffer.append('\\');
                        buffer.append(c);
                        isEscaped = false;
                    }
                    break;
                case '*':
                    if (isStringMatcher && !isEscaped) {
                        buffer.append(".*"); //$NON-NLS-1$
                    } else {
                        buffer.append('\\');
                        buffer.append(c);
                        isEscaped = false;
                    }
                    break;
                default:
                    if (isEscaped) {
                        buffer.append("\\\\"); //$NON-NLS-1$
                        isEscaped = false;
                    }
                    buffer.append(c);
                    break;
            }
        }
        if (isEscaped) {
            buffer.append("\\\\"); //$NON-NLS-1$
            isEscaped = false;
        }
        return buffer;
    }


    /**
     * Interprets escaped characters in the given replace pattern.
     * 
     * @param replaceText the replace pattern
     * @param foundText the found pattern to be replaced
     * @param lineDelim the line delimiter to use for \R
     * @return a replace pattern with escaped characters substituted by the respective characters
     * @since 3.4
     */
    public static String interpretReplaceEscapes(String replaceText, String foundText, String lineDelim) {
        return new ReplaceStringConstructor(lineDelim).interpretReplaceEscapes(replaceText, foundText);
    }


    /**
     * Copied from {@link FindReplaceDocumentAdapter}}
     * 
     * FindReplaceDocumentAdapter with contributions from:
     * Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing - https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949
     * Cagatay Calli <ccalli@gmail.com> - [find/replace] define & fix behavior of retain caps with other escapes and text before \C - https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061
     */
    private static class ReplaceStringConstructor {


        private static final int RC_MIXED = 0;
        private static final int RC_UPPER = 1;
        private static final int RC_LOWER = 2;
        private static final int RC_FIRSTUPPER = 3;


        private int fRetainCaseMode;
        private final String fLineDelim;


        public ReplaceStringConstructor(String lineDelim) {
            fLineDelim = lineDelim;


        }


        /**
         * Interprets escaped characters in the given replace pattern.
         * 
         * @param replaceText the replace pattern
         * @param foundText the found pattern to be replaced
         * @return a replace pattern with escaped characters substituted by the respective characters
         * @since 3.4
         */
        private String interpretReplaceEscapes(String replaceText, String foundText) {
            int length = replaceText.length();
            boolean inEscape = false;
            StringBuffer buf = new StringBuffer(length);


            /* every string we did not check looks mixed at first
             * so initialize retain case mode with RC_MIXED
             */
            fRetainCaseMode = RC_MIXED;


            for (int i = 0; i < length; i++) {
                final char ch = replaceText.charAt(i);
                if (inEscape) {
                    i = interpretReplaceEscape(ch, i, buf, replaceText, foundText);
                    inEscape = false;


                } else if (ch == '\\') {
                    inEscape = true;


                } else if (ch == '$') {
                    buf.append(ch);


                    /*
                     * Feature in java.util.regex.Matcher#replaceFirst(String):
                     * $00, $000, etc. are interpreted as $0 and
                     * $01, $001, etc. are interpreted as $1, etc. .
                     * If we support \0 as replacement pattern for capturing group 0,
                     * it would not be possible any more to write a replacement pattern
                     * that appends 0 to a capturing group (like $0\0).
                     * The fix is to interpret \00 and $00 as $0\0, and
                     * \01 and $01 as $0\1, etc.
                     */
                    if (i + 2 < length) {
                        char ch1 = replaceText.charAt(i + 1);
                        char ch2 = replaceText.charAt(i + 2);
                        if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') {
                            buf.append("0\\"); //$NON-NLS-1$
                            i++; // consume the 0
                        }
                    }
                } else {
                    interpretRetainCase(buf, ch);
                }
            }


            if (inEscape) {
                // '\' as last character is invalid, but we still add it to get an error message
                buf.append('\\');
            }
            return buf.toString();
        }


        /**
         * Interprets the escaped character <code>ch</code> at offset <code>i</code>
         * of the <code>replaceText</code> and appends the interpretation to <code>buf</code>.
         * 
         * @param ch the escaped character
         * @param i the offset
         * @param buf the output buffer
         * @param replaceText the original replace pattern
         * @param foundText the found pattern to be replaced
         * @return the new offset
         * @since 3.4
         */
        private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText, String foundText) {
            int length = replaceText.length();
            switch (ch) {
                case 'r':
                    buf.append('\r');
                    break;
                case 'n':
                    buf.append('\n');
                    break;
                case 't':
                    buf.append('\t');
                    break;
                case 'f':
                    buf.append('\f');
                    break;
                case 'a':
                    buf.append('\u0007');
                    break;
                case 'e':
                    buf.append('\u001B');
                    break;
                case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries
                    buf.append(fLineDelim);
                    break;
                /*
                 * \0 for octal is not supported in replace string, since it
                 * would conflict with capturing group \0, etc.
                 */
                case '0':
                    buf.append('$').append(ch);
                    /*
                     * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)"
                     * in interpretReplaceEscape(String) above.
                     */
                    if (i + 1 < length) {
                        char ch1 = replaceText.charAt(i + 1);
                        if ('0' <= ch1 && ch1 <= '9') {
                            buf.append('\\');
                        }
                    }
                    break;


                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    buf.append('$').append(ch);
                    break;


                case 'c':
                    if (i + 1 < length) {
                        char ch1 = replaceText.charAt(i + 1);
                        interpretRetainCase(buf, (char) (ch1 ^ 64));
                        i++;
                    } else {
                        String msg = SearchMessages.PatternConstructor_error_escape_sequence;
                        throw new PatternSyntaxException(msg, replaceText, i);
                    }
                    break;


                case 'x':
                    if (i + 2 < length) {
                        int parsedInt;
                        try {
                            parsedInt = Integer.parseInt(replaceText.substring(i + 1, i + 3), 16);
                            if (parsedInt < 0)
                                throw new NumberFormatException();
                        } catch (NumberFormatException e) {
                            String msg = SearchMessages.PatternConstructor_error_hex_escape_sequence;
                            throw new PatternSyntaxException(msg, replaceText, i);
                        }
                        interpretRetainCase(buf, (char) parsedInt);
                        i += 2;
                    } else {
                        String msg = SearchMessages.PatternConstructor_error_hex_escape_sequence;
                        throw new PatternSyntaxException(msg, replaceText, i);
                    }
                    break;


                case 'u':
                    if (i + 4 < length) {
                        int parsedInt;
                        try {
                            parsedInt = Integer.parseInt(replaceText.substring(i + 1, i + 5), 16);
                            if (parsedInt < 0)
                                throw new NumberFormatException();
                        } catch (NumberFormatException e) {
                            String msg = SearchMessages.PatternConstructor_error_unicode_escape_sequence;
                            throw new PatternSyntaxException(msg, replaceText, i);
                        }
                        interpretRetainCase(buf, (char) parsedInt);
                        i += 4;
                    } else {
                        String msg = SearchMessages.PatternConstructor_error_unicode_escape_sequence;
                        throw new PatternSyntaxException(msg, replaceText, i);
                    }
                    break;


                case 'C':
                    if (foundText.toUpperCase().equals(foundText)) // is whole match upper-case?
                        fRetainCaseMode = RC_UPPER;
                    else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case?
                        fRetainCaseMode = RC_LOWER;
                    else if (Character.isUpperCase(foundText.charAt(0))) // is first character upper-case?
                        fRetainCaseMode = RC_FIRSTUPPER;
                    else
                        fRetainCaseMode = RC_MIXED;
                    break;


                default:
                    // unknown escape k: append uninterpreted \k
                    buf.append('\\').append(ch);
                    break;
            }
            return i;
        }


        /**
         * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed)
         * and appends the character <code>ch</code> to <code>buf</code> after processing.
         * 
         * @param buf the output buffer
         * @param ch the character to process
         * @since 3.4
         */
        private void interpretRetainCase(StringBuffer buf, char ch) {
            if (fRetainCaseMode == RC_UPPER)
                buf.append(Character.toUpperCase(ch));
            else if (fRetainCaseMode == RC_LOWER)
                buf.append(Character.toLowerCase(ch));
            else if (fRetainCaseMode == RC_FIRSTUPPER) {
                buf.append(Character.toUpperCase(ch));
                fRetainCaseMode = RC_MIXED;
            } else
                buf.append(ch);
        }


    }
}
Source Code of com.python.pydev.refactoring.refactorer.search.copied.PatternConstructor

Related Classes of com.python.pydev.refactoring.refactorer.search.copied.PatternConstructor